Clustering - Sleep Recommendations

This notebook is used to produce results related to clustering of data from the fitbit vitals data loaded from the corresponding pickle files and using sleep efficiency labels to then further find cluster impurities, distrinution and good sleep reciepes

Importing Required Libraries

In [1]:
# Importing scientific libarires required for analysis and handling data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

# Importing libraries related to handling of files and directory
import os
import glob
import pickle
import random

# Importing utility functions from the code base
from utils.directory_utils import *
from utils.general_utils import *
from utils.sleep_utils import *
from data_preprocessor.get_user_data import *
from clustering_utils import *
from kmeans_dm import *

# Importing Machine Learning utilities
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tslearn.clustering import TimeSeriesKMeans
from sklearn.decomposition import PCA
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.stats import boxcox
from scipy.spatial import distance
from tslearn.metrics import dtw, cdist_dtw
from sklearn.metrics import silhouette_score
from scipy.stats import entropy

Data

This section loads all different types of data from the pickle files that we have saved and then loads the relevant data into numpy array for further analysis

Heart Rate, Sleep, Calories and Activity Time Series Data

User Data Loader

In [4]:
# First we load the data for each user seperately from their own numpy array and then stack them to get the final array
numpy_array_directory = f'../data/data_numpy_arrays/'

heart_rate_ts_data = []
calories_ts_data = []
activity_label_ts_data = []
activity_percentages = []
sleep_effeciency_ratio = []
sleep_stages_summary = []

for user_name in get_subdirectory_nms(numpy_array_directory):
#     if user_name in  ['Meghna\\']:
#         continue
    user_directory = construct_path(numpy_array_directory, user_name)

    user_heart_rate_ts_data = np.load(construct_path(user_directory, f'heart_rate_ts_data.npy'))
    user_calories_ts_data = np.load(construct_path(user_directory, f'calories_ts_data.npy'))
    user_activity_label_ts_data = np.load(construct_path(user_directory, f'activity_label_ts_data.npy'))
    user_activity_percentages = np.load(construct_path(user_directory, f'activity_percentages.npy'))
    user_sleep_effeciency_ratio = np.load(construct_path(user_directory, f'sleep_efficiency_ratio.npy'))
    user_sleep_stages_summary = pd.read_csv(construct_path(user_directory, f'sleep_stages_summary.csv'))

    heart_rate_ts_data.append(user_heart_rate_ts_data)
    calories_ts_data.append(user_calories_ts_data)
    activity_label_ts_data.append(user_activity_label_ts_data)
    activity_percentages.append(user_activity_percentages)
    sleep_effeciency_ratio.append(user_sleep_effeciency_ratio)
    sleep_stages_summary.append(user_sleep_stages_summary)

heart_rate_ts_data = np.vstack(heart_rate_ts_data)[:, :]
calories_ts_data = np.vstack(calories_ts_data)[:, :]
activity_label_ts_data = np.vstack(activity_label_ts_data)[:, :]
activity_percentages = np.vstack(activity_percentages)
sleep_effeciency_ratio = np.hstack(sleep_effeciency_ratio)
sleep_stages_summary = pd.concat(sleep_stages_summary)
In [3]:
activity_percentages = activity_percentages * 1440 / 100

Check for the shape of all the arrays and dataframes

In [5]:
# Check for the shape of all the arrays and dataframes
heart_rate_ts_data.shape, calories_ts_data.shape, activity_label_ts_data.shape, sleep_effeciency_ratio.shape, sleep_stages_summary.shape
Out[5]:
((272, 1440), (272, 1440), (272, 1440), (272,), (272, 4))
In [6]:
# Make sure activity value does not have a nan field (not sure how we would fill this)
print(np.isnan(activity_label_ts_data).any())
# Check that no nans in any of the data
np.isnan(heart_rate_ts_data).any(), np.isnan(calories_ts_data).any()
False
Out[6]:
(False, False)

Transformations

This section uses different ways to transform the original time series data

This section will essentially find the trends from the original data

In [7]:
trend_window_length = 10
In [8]:
heart_trends = []
counter = 0
for day in heart_rate_ts_data:
    counter += 1
    result = seasonal_decompose(day, model='additive', freq=trend_window_length, extrapolate_trend='freq')
    heart_trends.append(result.trend)
heart_trends = np.array(heart_trends)
heart_trends = remove_nans_from_array(heart_trends)
# Make sure the shape is same and there are no nan values
heart_trends.shape, np.isnan(heart_trends).any()
Out[8]:
((272, 1440), False)
In [9]:
# plotting heart trends to asses the fit to the overall data
plt.figure(figsize=(10, 5))
plt.plot(heart_rate_ts_data[0, :], lw=2, label='Original Heart Rate')
plt.plot(heart_trends[0, :], color='r', lw=2, label='Decomposed Heart Trends')
plt.xlabel('Minute')
plt.ylabel('BPM')
plt.legend()
Out[9]:
<matplotlib.legend.Legend at 0x21b856d6160>
In [10]:
calories_trends = []
for day in calories_ts_data:
    result = seasonal_decompose(day, model='additive', freq=trend_window_length, extrapolate_trend='freq')
    calories_trends.append(result.trend)
calories_trends = np.array(calories_trends)
calories_trends = remove_nans_from_array(calories_trends)
# Make sure the shape is same and there are no nan values
calories_trends.shape, np.isnan(calories_trends).any()
Out[10]:
((272, 1440), False)
In [11]:
# plotting caloires trends to asses the fit to the overall data
plt.figure(figsize=(10, 5))
plt.plot(calories_ts_data[0, :], lw=2, label='Original Calories Burned')
plt.plot(calories_trends[0, :], color='r', lw=2, label='Decomposed Calories Burned Trends')
plt.xlabel('Minute')
plt.ylabel('Calories Burned')
plt.legend()
Out[11]:
<matplotlib.legend.Legend at 0x21b85781860>

Chipping the Data

This section chips away some heart data

In [12]:
heart_trends = heart_trends[:, 480:1200]
calories_trends = calories_trends[:, 480:1200]
heart_trends.shape, calories_trends.shape
Out[12]:
((272, 720), (272, 720))

Dimensionality Reduction

This section will reduce the dimensions of the arrays so that we can easily apply different clustering techniques on them

In [13]:
mean_window_length = 10
In [14]:
# Reduce the dimension of the arrays
reduced_heart_trends = reduce_time_series_dimension(heart_trends, mean_window_length, hours=12)
reduced_calories_trends = reduce_time_series_dimension(calories_trends, mean_window_length, hours=12)
# Check for the shape of the arrays
reduced_heart_trends.shape, reduced_calories_trends.shape
Out[14]:
((272, 72), (272, 72))

Sleep Labels

In this section of the notebook we try to find the optimal boundary for constructing the sleep labels using different techniques

In [34]:
# Constructing a histogram plot for the sleep efficiency ratio.
# Sleep Efficiency Ratio is found as total_time_asleep / total_time_in_bed
sns.distplot(sleep_effeciency_ratio, kde=False)
plt.xlabel('Sleep Efficiency')
plt.ylabel('Frequency')
plt.title('Distribution of sleep efficiency of all subjects')
Out[34]:
Text(0.5, 1.0, 'Distribution of sleep efficiency of all subjects')
In [14]:
# Constructing a histogram plot for the different sleep stages.
fig, ax = plt.subplots(2, 2, figsize=(15, 10))
sns.distplot(sleep_stages_summary['wake'], ax = ax[0, 0])
ax[0, 0].set_xlabel('Minutes Awake')
ax[0, 0].set_ylabel('Frequency')
ax[0, 0].set_title('Minutes Awake Histogram')

sns.distplot(sleep_stages_summary['light'], ax = ax[0, 1])
ax[0, 1].set_xlabel('Minutes in Light Sleep')
ax[0, 1].set_ylabel('Frequency')
ax[0, 1].set_title('Minutes in Light Sleep Histogram')

sns.distplot(sleep_stages_summary['rem'], ax = ax[1, 0])
ax[1, 0].set_xlabel('Minutes in Rem Sleep')
ax[1, 0].set_ylabel('Frequency')
ax[1, 0].set_title('Minutes in REM Sleep Histogram')

sns.distplot(sleep_stages_summary['deep'], ax = ax[1, 1])
ax[1, 1].set_xlabel('Minutes in Deep Sleep')
ax[1, 1].set_ylabel('Frequency')
ax[1, 1].set_title('Minutes in Deep Sleep Histogram')
Out[14]:
Text(0.5, 1.0, 'Minutes in Deep Sleep Histogram')

Gap Definition For Sleep Efficiency

Create a gap of certain length: Which will be a parameter

Example: 0.05 - 0.875 and above, 0.825 and below

In [15]:
final_sleep_labels = sleep_effeciency_ratio > 0.89
sns.barplot(['Poor Sleep', 'Good Sleep'], [np.sum(~final_sleep_labels), np.sum(final_sleep_labels)], hue=[True, True])
plt.legend([])
plt.title('Number of Records v/s Sleep Class')
plt.ylabel('Number of Records')
Out[15]:
Text(0, 0.5, 'Number of Records')

HeatMap for Euclidean and DTW Distances

In [16]:
good_sleep_heart_trends = reduced_heart_trends[final_sleep_labels]
poor_sleep_heart_trends = reduced_heart_trends[~final_sleep_labels]
print(good_sleep_heart_trends.shape, poor_sleep_heart_trends.shape)
ordered_heart_trends = np.vstack((good_sleep_heart_trends, poor_sleep_heart_trends))
print(ordered_heart_trends.shape)
(137, 72) (135, 72)
(272, 72)
In [17]:
good_sleep_calories_trends = reduced_calories_trends[final_sleep_labels]
poor_sleep_calories_trends = reduced_calories_trends[~final_sleep_labels]
print(good_sleep_calories_trends.shape, poor_sleep_calories_trends.shape)
ordered_calories_trends = np.vstack((good_sleep_calories_trends, poor_sleep_calories_trends))
print(ordered_calories_trends.shape)
(137, 72) (135, 72)
(272, 72)
In [19]:
%%time
dtw_dist_heart = cdist_dtw(ordered_heart_trends)
dtw_dist_calories = cdist_dtw(ordered_calories_trends)
Wall time: 10min 8s
In [20]:
%%time
euc_dist_heart = distance.cdist(ordered_heart_trends, ordered_heart_trends)
euc_dist_calories = distance.cdist(ordered_calories_trends, ordered_calories_trends)
Wall time: 19 ms
In [21]:
m_dist_heart = distance.cdist(ordered_heart_trends, ordered_heart_trends, 'mahalanobis')
m_dist_calories = distance.cdist(ordered_calories_trends, ordered_calories_trends, 'mahalanobis')
l1_dist_heart = distance.cdist(ordered_heart_trends, ordered_heart_trends, 'minkowski', p=1)
l1_dist_calories = distance.cdist(ordered_calories_trends, ordered_calories_trends, 'minkowski', p=1)
In [26]:
cor_dist_heart = distance.cdist(ordered_heart_trends, ordered_heart_trends, 'correlation')
cor_dist_calories = distance.cdist(ordered_calories_trends, ordered_calories_trends, 'correlation')
In [79]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
sns.heatmap(dtw_dist_heart, xticklabels=137, yticklabels=137, ax=ax[0])
ax[0].set_title('DTW Distance Cross Matrix for Heart Trends')
sns.heatmap(dtw_dist_calories, xticklabels=137, yticklabels=137, ax=ax[1])
ax[1].set_title('DTW Distance Cross Matrix for Calories Trends')
Out[79]:
Text(0.5, 1.0, 'DTW Distance Cross Matrix for Calories Trends')
In [78]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
sns.heatmap(euc_dist_heart, xticklabels=137, yticklabels=137, ax=ax[0])
ax[0].set_title('L-2 Norm Distance Cross Matrix for Heart Trends')
sns.heatmap(euc_dist_calories, xticklabels=137, yticklabels=137, ax=ax[1])
ax[1].set_title('L-2 Norm Distance Cross Matrix for Calories Trends')
Out[78]:
Text(0.5, 1.0, 'L-2 Norm Distance Cross Matrix for Calories Trends')
In [74]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
sns.heatmap(m_dist_heart, xticklabels=137, yticklabels=137, ax=ax[0])
ax[0].set_title('All Sleep Mahalanobis Distance Cross Matrix for Heart Trends')
sns.heatmap(m_dist_calories, xticklabels=137, yticklabels=137, ax=ax[1])
ax[1].set_title('All Sleep Mahalanobis Distance Cross Matrix for Calories Trends')
Out[74]:
Text(0.5, 1.0, 'All Sleep Mahalanobis Distance Cross Matrix for Calories Trends')
In [77]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
sns.heatmap(l1_dist_heart, xticklabels=137, yticklabels=137, ax=ax[0])
ax[0].set_title('L1 Norm Distance Cross Matrix for Heart Trends')
sns.heatmap(l1_dist_calories, xticklabels=137, yticklabels=137, ax=ax[1])
ax[1].set_title('L1 Norm Distance Cross Matrix for Calories Trends')
Out[77]:
Text(0.5, 1.0, 'L1 Norm Distance Cross Matrix for Calories Trends')
In [76]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
sns.heatmap(cor_dist_heart, xticklabels=137, yticklabels=137, ax=ax[0])
ax[0].set_title('Correlation Cross Matrix for Heart Trends')
sns.heatmap(cor_dist_calories, xticklabels=137, yticklabels=137, ax=ax[1])
ax[1].set_title('Correlation Cross Matrix for Calories Trends')
Out[76]:
Text(0.5, 1.0, 'Correlation Cross Matrix for Calories Trends')

Activity Percentages

In this section of the notebook we aggregate the activity labels of a person from minute level to percentage level

In [ ]:
# Constructing a histogram plot for the different activity level percentages.
fig, ax = plt.subplots(2, 2, figsize=(15, 10))
sns.distplot(activity_percentages[:, 0], ax = ax[0, 0])
ax[0, 0].set_xlabel('% Sedentary Activity')
ax[0, 0].set_ylabel('Frequency')
ax[0, 0].set_title('% Sedentary Activity Histogram')

sns.distplot(activity_percentages[:, 1], ax = ax[0, 1])
ax[0, 1].set_xlabel('% Light Activity')
ax[0, 1].set_ylabel('Frequency')
ax[0, 1].set_title('% Light Activity Histogram')

sns.distplot(activity_percentages[:, 2], ax = ax[1, 0])
ax[1, 0].set_xlabel('% Moderate Activity')
ax[1, 0].set_ylabel('Frequency')
ax[1, 0].set_title('% Moderate Activity Histogram')

sns.distplot(activity_percentages[:, 3], ax = ax[1, 1])
ax[1, 1].set_xlabel('% Vigorous Activity')
ax[1, 1].set_ylabel('Frequency')
ax[1, 1].set_title('% Vigorous Activity Histogram')
In [ ]:
# Constructing a histogram plot for the different activity level percentages visualizing with respect to the good sleep label
fig, ax = plt.subplots(2, 2, figsize=(15, 10))
sns.distplot(activity_percentages[~final_sleep_labels, 0], ax = ax[0, 0], color='red', label='Poor Sleep')
sns.distplot(activity_percentages[final_sleep_labels, 0], ax = ax[0, 0], color='green', label='Good Sleep')
ax[0, 0].set_xlabel('% Sedentary Activity')
ax[0, 0].set_ylabel('Frequency')
ax[0, 0].set_title('% Sedentary Activity Histogram')
ax[0, 0].legend()

sns.distplot(activity_percentages[~final_sleep_labels, 1], ax = ax[0, 1], color='red', label='Poor Sleep')
sns.distplot(activity_percentages[final_sleep_labels, 1], ax = ax[0, 1], color='green', label='Good Sleep')
ax[0, 1].set_xlabel('% Light Activity')
ax[0, 1].set_ylabel('Frequency')
ax[0, 1].set_title('% Light Activity Histogram')
ax[0, 1].legend()

sns.distplot(activity_percentages[~final_sleep_labels, 2], ax = ax[1, 0], color='red', label='Poor Sleep')
sns.distplot(activity_percentages[final_sleep_labels, 2], ax = ax[1, 0], color='green', label='Good Sleep')
ax[1, 0].set_xlabel('% Moderate Activity')
ax[1, 0].set_ylabel('Frequency')
ax[1, 0].set_title('% Moderate Activity Histogram')
ax[1, 0].legend()

sns.distplot(activity_percentages[~final_sleep_labels, 3], ax = ax[1, 1], color='red', label='Poor Sleep')
sns.distplot(activity_percentages[final_sleep_labels, 3], ax = ax[1, 1], color='green', label='Good Sleep')
ax[1, 1].set_xlabel('% Vigorous Activity')
ax[1, 1].set_ylabel('Frequency')
ax[1, 1].set_title('% High Activity Histogram')
ax[1, 1].legend()

Clustering

In this section of the notebook we apply different clustering techniques on the data that we have got and see what are the different recipes

In [18]:
num_master_clusters = 4
num_activity_clusters = 12

K-Means - Euclidean

Here we apply K-Means on the data with euclidean (L-2 Norm) as the distance metric

Getting the Best Model

In [118]:
kmeans_mod = get_purest_clustering_model(lambda num_clusters: KMeans(num_clusters), reduced_heart_trends, final_sleep_labels)

Fitting the Model

In [124]:
# Set the seed so that get the same clustering everytime
# random.seed(2)
# np.random.seed(1000)
# Performing the Clustering
kmeans_mod = KMeans(n_clusters=12)
kmeans_mod.fit(reduced_heart_trends)
cluster_assignments = kmeans_mod.predict(reduced_heart_trends)
sil_score = silhouette_score(reduced_heart_trends, cluster_assignments)
print(kmeans_mod.n_clusters, sil_score)
np.unique(cluster_assignments, return_counts=True)
12 0.06605007818500935
Out[124]:
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]),
 array([12, 14, 17, 35, 19, 31, 23, 16, 21, 22, 18, 44], dtype=int64))
In [126]:
get_all_clusters_sleep_purity(cluster_assignments, final_sleep_labels)
Out[126]:
0.5875023880056106
In [127]:
# Update the number of activity clusters based on the minimum amount of records assigned to a cluster
num_activity_clusters = min(num_activity_clusters, *(np.unique(cluster_assignments, return_counts=True)[1]))
print('Updated Number of activity clusters:', num_activity_clusters)
Updated Number of activity clusters: 12
In [98]:
# Visualizing the number of points in each cluster
sns.distplot(cluster_assignments, kde=False)
Out[98]:
<matplotlib.axes._subplots.AxesSubplot at 0x25396df0a58>

Visualization of Clusters

In [105]:
# Simple Cluster Visualization
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
plt.figure(figsize=(7, 5))
# sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=cluster_assignments, style=cluster_assignments)
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=cluster_assignments)#, size=cluster_assignments)
plt.xlabel('PCA Dim 1')
plt.ylabel('PCA Dim 2')
plt.title('Visualization of Clusters')
# plt.legend([f'Cluster: {i+1}' for i in range(4)])
plt.legend([])
Out[105]:
<matplotlib.legend.Legend at 0x25399f22860>
In [103]:
# Cluster Visualization based on Sleep Efficiency
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
plt.figure(figsize=(7, 5))
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], size=final_sleep_labels, hue=cluster_assignments)
plt.xlabel('PCA Dim 1')
plt.ylabel('PCA Dim 2')
plt.title('Clusters Visualized')
plt.legend([])
Out[103]:
<matplotlib.legend.Legend at 0x25399e5f320>
In [ ]:
fig, ax = plt.subplots(1, 2, figsize=(15, 7))

# Simple Cluster Visualization
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=cluster_assignments, style=cluster_assignments, ax=ax[0])
ax[0].set_xlabel('PCA Dim 1')
ax[0].set_ylabel('PCA Dim 2')
ax[0].set_title('Clusters Visualized')
ax[0].legend([f'Cluster: {i+1}' for i in range(4)])

# Cluster Visualization based on Sleep Efficiency
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=final_sleep_labels, style=cluster_assignments, ax=ax[1])
ax[1].set_xlabel('PCA Dim 1')
ax[1].set_ylabel('PCA Dim 2')
ax[1].set_title('Clusters Visualized')
ax[1].legend([])

Cluster Purity

Finding cluster purity based on the sleep labels

In [ ]:
# Clustering Purity is defined by ratio of dominant class of sleep label instance in the cluster 
# to total number of instances in the cluster
for master_cluster_num in range(len(kmeans_mod.cluster_centers_)):
    cluster_sleep_labels = final_sleep_labels[cluster_assignments == master_cluster_num]
    pos_sleep_label_purity = sum(cluster_sleep_labels) / cluster_sleep_labels.shape[0]
    print(f'Cluster Number: {master_cluster_num}, Purity:', max(pos_sleep_label_purity, 1 - pos_sleep_label_purity))

Sub-Clustering on Activity Data

In [128]:
sub_clusters = activity_percentage_clusterer(KMeans(n_clusters=num_activity_clusters), cluster_assignments, activity_percentages)
In [107]:
# Sanity Check for the number of points in each cluster
print(np.unique(cluster_assignments, return_counts=True))
for sub_cluster in sub_clusters:
    print(sub_cluster.shape)
(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23]), array([ 4, 34, 13,  9, 15,  7,  4, 26,  7,  4,  5,  9,  7,  7, 15, 17,  6,
        2, 27,  4, 25,  8,  3, 14], dtype=int64))
(4,)
(34,)
(13,)
(9,)
(15,)
(7,)
(4,)
(26,)
(7,)
(4,)
(5,)
(9,)
(7,)
(7,)
(15,)
(17,)
(6,)
(2,)
(27,)
(4,)
(25,)
(8,)
(3,)
(14,)
Cluster Purity in each subcluster
In [108]:
# Clustering Purity is defined by ratio of dominant class of sleep label instance in the cluster
# to total number of instances in the cluster
for index, sub_cluster in enumerate(sub_clusters):
    print('Master Cluster:', index+1)
    cluster_sleep_labels = final_sleep_labels[(cluster_assignments == index)]
    for sub_cluster_assignment in range(num_activity_clusters):
        sub_cluster_sleep_labels = cluster_sleep_labels[(sub_cluster==sub_cluster_assignment)]
        try:
            pos_sleep_label_purity = sum(sub_cluster_sleep_labels) / sub_cluster_sleep_labels.shape[0]
            print(f'Sub Cluster Number: {sub_cluster_assignment}, Purity:', max(pos_sleep_label_purity, 0))#, 1 - pos_sleep_label_purity))
        except:
            print(f'Sub Cluster Number: {sub_cluster_assignment}, No Points assigned')
Master Cluster: 1
Sub Cluster Number: 0, Purity: 0.0
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 2
Sub Cluster Number: 0, Purity: 0.6923076923076923
Sub Cluster Number: 1, Purity: 0.75
Master Cluster: 3
Sub Cluster Number: 0, Purity: 1.0
Sub Cluster Number: 1, Purity: 0.7142857142857143
Master Cluster: 4
Sub Cluster Number: 0, Purity: 0.375
Sub Cluster Number: 1, Purity: 1.0
Master Cluster: 5
Sub Cluster Number: 0, Purity: 0.7142857142857143
Sub Cluster Number: 1, Purity: 0.75
Master Cluster: 6
Sub Cluster Number: 0, Purity: 1.0
Sub Cluster Number: 1, Purity: 0.8
Master Cluster: 7
Sub Cluster Number: 0, Purity: 0.0
Sub Cluster Number: 1, Purity: 0.5
Master Cluster: 8
Sub Cluster Number: 0, Purity: 0.5833333333333334
Sub Cluster Number: 1, Purity: 1.0
Master Cluster: 9
Sub Cluster Number: 0, Purity: 0.16666666666666666
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 10
Sub Cluster Number: 0, Purity: 1.0
Sub Cluster Number: 1, Purity: 0.5
Master Cluster: 11
Sub Cluster Number: 0, Purity: 0.25
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 12
Sub Cluster Number: 0, Purity: 0.8571428571428571
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 13
Sub Cluster Number: 0, Purity: 0.25
Sub Cluster Number: 1, Purity: 0.3333333333333333
Master Cluster: 14
Sub Cluster Number: 0, Purity: 0.25
Sub Cluster Number: 1, Purity: 0.6666666666666666
Master Cluster: 15
Sub Cluster Number: 0, Purity: 0.45454545454545453
Sub Cluster Number: 1, Purity: 0.5
Master Cluster: 16
Sub Cluster Number: 0, Purity: 0.5
Sub Cluster Number: 1, Purity: 0.6
Master Cluster: 17
Sub Cluster Number: 0, Purity: 0.0
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 18
Sub Cluster Number: 0, Purity: 0.0
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 19
Sub Cluster Number: 0, Purity: 0.7272727272727273
Sub Cluster Number: 1, Purity: 0.4
Master Cluster: 20
Sub Cluster Number: 0, Purity: 0.3333333333333333
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 21
Sub Cluster Number: 0, Purity: 0.23529411764705882
Sub Cluster Number: 1, Purity: 0.5
Master Cluster: 22
Sub Cluster Number: 0, Purity: 0.3333333333333333
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 23
Sub Cluster Number: 0, Purity: 0.0
Sub Cluster Number: 1, Purity: 0.0
Master Cluster: 24
Sub Cluster Number: 0, Purity: 0.3333333333333333
Sub Cluster Number: 1, Purity: 0.2
In [129]:
sleep_recipes = get_good_sleep_recipes(cluster_assignments, sub_clusters, activity_percentages, final_sleep_labels)
sleep_recipes * 720 / 100
Cluster: 0, Sub Cluster: 1, Good Ratio: inf
Cluster: 0, Sub Cluster: 3, Good Ratio: inf
Cluster: 0, Sub Cluster: 8, Good Ratio: inf
Cluster: 0, Sub Cluster: 10, Good Ratio: inf
Cluster: 0, Sub Cluster: 11, Good Ratio: inf
Cluster: 1, Sub Cluster: 1, Good Ratio: inf
Cluster: 2, Sub Cluster: 1, Good Ratio: 3.0
Cluster: 2, Sub Cluster: 3, Good Ratio: inf
Cluster: 2, Sub Cluster: 4, Good Ratio: inf
Cluster: 2, Sub Cluster: 5, Good Ratio: inf
Cluster: 2, Sub Cluster: 8, Good Ratio: inf
Cluster: 2, Sub Cluster: 9, Good Ratio: inf
Cluster: 2, Sub Cluster: 10, Good Ratio: inf
Cluster: 2, Sub Cluster: 11, Good Ratio: inf
Cluster: 3, Sub Cluster: 2, Good Ratio: 2.0
Cluster: 3, Sub Cluster: 7, Good Ratio: 2.0
Cluster: 3, Sub Cluster: 8, Good Ratio: inf
Cluster: 3, Sub Cluster: 10, Good Ratio: inf
Cluster: 4, Sub Cluster: 3, Good Ratio: inf
Cluster: 4, Sub Cluster: 4, Good Ratio: inf
Cluster: 5, Sub Cluster: 0, Good Ratio: inf
Cluster: 5, Sub Cluster: 2, Good Ratio: 2.0
Cluster: 5, Sub Cluster: 4, Good Ratio: inf
Cluster: 5, Sub Cluster: 5, Good Ratio: inf
Cluster: 5, Sub Cluster: 6, Good Ratio: inf
Cluster: 5, Sub Cluster: 8, Good Ratio: 2.0
Cluster: 5, Sub Cluster: 9, Good Ratio: inf
Cluster: 5, Sub Cluster: 10, Good Ratio: inf
Cluster: 5, Sub Cluster: 11, Good Ratio: inf
Cluster: 6, Sub Cluster: 0, Good Ratio: 2.0
Cluster: 6, Sub Cluster: 4, Good Ratio: inf
Cluster: 6, Sub Cluster: 5, Good Ratio: inf
Cluster: 6, Sub Cluster: 7, Good Ratio: inf
Cluster: 6, Sub Cluster: 9, Good Ratio: inf
Cluster: 6, Sub Cluster: 11, Good Ratio: inf
Cluster: 7, Sub Cluster: 8, Good Ratio: inf
Cluster: 8, Sub Cluster: 0, Good Ratio: 2.0
Cluster: 8, Sub Cluster: 1, Good Ratio: inf
Cluster: 8, Sub Cluster: 2, Good Ratio: inf
Cluster: 8, Sub Cluster: 4, Good Ratio: inf
Cluster: 8, Sub Cluster: 5, Good Ratio: 2.0
Cluster: 8, Sub Cluster: 6, Good Ratio: 2.0
Cluster: 8, Sub Cluster: 7, Good Ratio: inf
Cluster: 8, Sub Cluster: 8, Good Ratio: inf
Cluster: 8, Sub Cluster: 9, Good Ratio: inf
Cluster: 9, Sub Cluster: 2, Good Ratio: inf
Cluster: 9, Sub Cluster: 4, Good Ratio: inf
Cluster: 9, Sub Cluster: 5, Good Ratio: inf
Cluster: 9, Sub Cluster: 6, Good Ratio: inf
Cluster: 9, Sub Cluster: 9, Good Ratio: inf
Cluster: 10, Sub Cluster: 2, Good Ratio: inf
Cluster: 10, Sub Cluster: 5, Good Ratio: inf
Cluster: 10, Sub Cluster: 6, Good Ratio: inf
Cluster: 11, Sub Cluster: 0, Good Ratio: inf
Cluster: 11, Sub Cluster: 4, Good Ratio: 2.0
Cluster: 11, Sub Cluster: 6, Good Ratio: inf
Cluster: 11, Sub Cluster: 8, Good Ratio: inf
Cluster: 11, Sub Cluster: 9, Good Ratio: inf
Cluster: 11, Sub Cluster: 10, Good Ratio: 4.0
Cluster: 11, Sub Cluster: 11, Good Ratio: inf
Out[129]:
array([[5.6204999e+02, 1.1401875e+02, 2.3006250e+01, 2.0995312e+01],
       [5.9084998e+02, 9.6974998e+01, 1.8000000e+01, 1.3999219e+01],
       [4.6485001e+02, 2.1802499e+02, 2.4004688e+01, 1.3000781e+01],
       [5.6115002e+02, 1.4298750e+02, 6.0011721e+00, 9.9984379e+00],
       [5.6879999e+02, 1.3905000e+02, 3.0005860e+00, 9.0000000e+00],
       [2.9295001e+02, 1.5401250e+02, 1.5097501e+02, 1.2195000e+02],
       [5.2379999e+02, 1.8866251e+02, 6.9996095e+00, 3.3332521e-01],
       [5.5440002e+02, 1.6560001e+02, 0.0000000e+00, 0.0000000e+00],
       [5.8184998e+02, 1.0597500e+02, 9.9984379e+00, 2.1993750e+01],
       [5.7600000e+02, 1.4400000e+02, 0.0000000e+00, 0.0000000e+00],
       [5.7104999e+02, 1.0800000e+02, 2.6001562e+01, 1.5004687e+01],
       [5.3820001e+02, 1.5997501e+02, 8.0015621e+00, 1.3999219e+01],
       [5.3279999e+02, 1.7100000e+02, 1.3999219e+01, 2.0003905e+00],
       [5.9400000e+02, 1.1401875e+02, 1.0996875e+01, 1.0001953e+00],
       [7.1729999e+02, 2.4996095e+00, 0.0000000e+00, 0.0000000e+00],
       [4.1804999e+02, 2.9070001e+02, 1.1503125e+01, 0.0000000e+00],
       [3.8700000e+02, 1.5705000e+02, 5.7993752e+01, 1.1801250e+02],
       [6.6015002e+02, 6.0018749e+01, 0.0000000e+00, 0.0000000e+00],
       [4.4189999e+02, 2.1003751e+02, 3.0009375e+01, 3.7996876e+01],
       [6.0615002e+02, 7.4981247e+01, 4.9992189e+00, 3.4003124e+01],
       [5.9129999e+02, 1.2138750e+02, 3.3328125e+00, 4.0007811e+00],
       [7.0829999e+02, 1.1503125e+01, 0.0000000e+00, 0.0000000e+00],
       [5.2334998e+02, 1.9361250e+02, 3.3328125e+00, 0.0000000e+00],
       [6.4800000e+02, 7.2000000e+01, 0.0000000e+00, 0.0000000e+00],
       [4.2997501e+02, 2.9002499e+02, 0.0000000e+00, 0.0000000e+00],
       [5.5754999e+02, 1.3905000e+02, 7.4953127e+00, 1.5750000e+01],
       [5.9309998e+02, 9.6018753e+01, 1.6003124e+01, 1.5004687e+01],
       [5.3954999e+02, 1.7145000e+02, 2.4996095e+00, 6.5003905e+00],
       [5.8184998e+02, 1.1401875e+02, 1.8000000e+01, 6.0011721e+00],
       [4.5810001e+02, 2.1948750e+02, 2.2007812e+01, 2.0503124e+01],
       [6.3809998e+02, 8.2012497e+01, 0.0000000e+00, 0.0000000e+00],
       [5.4090002e+02, 1.7898750e+02, 0.0000000e+00, 0.0000000e+00],
       [5.4900000e+02, 1.0102500e+02, 1.3000781e+01, 5.7009377e+01],
       [5.8409998e+02, 1.1700000e+02, 1.6003124e+01, 3.0005860e+00],
       [5.5709998e+02, 1.4197501e+02, 1.0504687e+01, 1.0497656e+01],
       [4.4392499e+02, 1.6200000e+02, 4.8993752e+01, 6.5025002e+01],
       [6.1829999e+02, 9.6974998e+01, 3.0005860e+00, 1.5002930e+00],
       [4.7879999e+02, 2.4097501e+02, 0.0000000e+00, 0.0000000e+00],
       [5.5304999e+02, 1.6143750e+02, 4.3347654e+00, 6.6665041e-01],
       [4.2502499e+02, 2.7607501e+02, 1.6003124e+01, 3.0005860e+00],
       [6.3945001e+02, 8.0493752e+01, 0.0000000e+00, 0.0000000e+00],
       [6.0209998e+02, 1.1002500e+02, 7.5023437e+00, 5.0009763e-01],
       [5.0489999e+02, 2.1498750e+02, 0.0000000e+00, 0.0000000e+00],
       [6.6015002e+02, 6.0018749e+01, 0.0000000e+00, 0.0000000e+00],
       [5.9534998e+02, 1.2453750e+02, 0.0000000e+00, 0.0000000e+00],
       [4.8510001e+02, 1.9597501e+02, 3.4987499e+01, 4.0007811e+00],
       [6.1829999e+02, 8.4037498e+01, 1.0504687e+01, 7.4953127e+00],
       [5.5484998e+02, 1.4647501e+02, 9.2531252e+00, 9.2531252e+00],
       [4.9410001e+02, 2.2601250e+02, 0.0000000e+00, 0.0000000e+00],
       [5.1795001e+02, 1.5997501e+02, 9.0000000e+00, 3.2990623e+01],
       [4.6620001e+02, 2.2702499e+02, 1.0996875e+01, 1.6003124e+01],
       [4.2097501e+02, 2.9002499e+02, 4.9992189e+00, 4.0007811e+00],
       [5.4315002e+02, 1.5401250e+02, 1.3999219e+01, 9.0000000e+00],
       [6.5384998e+02, 6.6318748e+01, 0.0000000e+00, 0.0000000e+00],
       [6.1829999e+02, 1.0175625e+02, 0.0000000e+00, 0.0000000e+00],
       [5.3009998e+02, 1.9001250e+02, 0.0000000e+00, 0.0000000e+00],
       [6.0479999e+02, 8.8987503e+01, 1.0996875e+01, 1.5004687e+01],
       [5.3684998e+02, 1.2701250e+02, 2.3006250e+01, 3.2990623e+01],
       [6.3720001e+02, 8.0437500e+01, 1.7499024e+00, 5.0009763e-01],
       [5.9895001e+02, 1.2116250e+02, 0.0000000e+00, 0.0000000e+00]],
      dtype=float32)
In [132]:
for i, sleep_recipe in enumerate(sleep_recipes):
    plt.figure(i)
    plt.bar(['Sedentary', 'Light', 'Moderate', 'Vigorous'], (sleep_recipe * 720 / 100))
    plt.ylabel('Minutes')
    plt.title('Activity Recipes for Sleep')

K-Means - DTW

Here we apply K-Means on the data with Dynamic Time Wrapping (DTW) as the distance metric

In [ ]:
num_activity_clusters = 2

Fitting the Model

In [ ]:
clusterer = get_best_clustering_model(lambda num_clusters: TimeSeriesKMeans(num_clusters, metric='dtw', max_iter=50), 
                                       reduced_heart_trends, cluster_range=range(2, 3))
In [ ]:
clusterer
In [ ]:
clusterer.labels_
In [ ]:
%%time
# Setting the seed
clusterer.fit(reduced_heart_trends)
cluster_assignments = clusterer.labels_
sil_score = silhouette_score(reduced_heart_trends, cluster_assignments)
print(clusterer.n_clusters, sil_score)
np.unique(cluster_assignments, return_counts=True)
In [ ]:
print(np.unique(cluster_assignments, return_counts=True))
In [ ]:
# Update the number of activity clusters based on the minimum amount of records assigned to a cluster
num_activity_clusters = min(num_activity_clusters, *(np.unique(cluster_assignments, return_counts=True)[1]))
print('Updated Number of activity clusters:', num_activity_clusters)
In [ ]:
# Visualizing the number of points in each cluster
sns.distplot(cluster_assignments, kde=False)

Visualization of Clusters

In [ ]:
# Simple Cluster Visualization
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
plt.figure(figsize=(7, 5))
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=cluster_assignments, style=cluster_assignments)
plt.xlabel('PCA Dim 1')
plt.ylabel('PCA Dim 2')
plt.title('Clusters Visualized')
plt.legend([f'Cluster: {i+1}' for i in range(4)])
In [ ]:
# Cluster Visualization based on Sleep Efficiency
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
plt.figure(figsize=(7, 5))
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=final_sleep_labels, style=cluster_assignments)
plt.xlabel('PCA Dim 1')
plt.ylabel('PCA Dim 2')
plt.title('Clusters Visualized')
plt.legend([])
In [ ]:
fig, ax = plt.subplots(1, 2, figsize=(15, 7))

# Simple Cluster Visualization
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=cluster_assignments, style=cluster_assignments, ax=ax[0])
ax[0].set_xlabel('PCA Dim 1')
ax[0].set_ylabel('PCA Dim 2')
ax[0].set_title('Clusters Visualized')
ax[0].legend([f'Cluster: {i+1}' for i in range(4)])

# Cluster Visualization based on Sleep Efficiency
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=final_sleep_labels, style=cluster_assignments, ax=ax[1])
ax[1].set_xlabel('PCA Dim 1')
ax[1].set_ylabel('PCA Dim 2')
ax[1].set_title('Clusters Visualized')
ax[1].legend([])

Cluster Purity

Finding cluster purity based on the sleep labels

In [ ]:
# Clustering Purity is defined by ratio of dominant class of sleep label instance in the cluster 
# to total number of instances in the cluster
for master_cluster_num in np.unique(cluster_assignments):
    cluster_sleep_labels = final_sleep_labels[cluster_assignments == master_cluster_num]
    pos_sleep_label_purity = sum(cluster_sleep_labels) / cluster_sleep_labels.shape[0]
    print(f'Cluster Number: {master_cluster_num}, Purity:', max(pos_sleep_label_purity, 1 - pos_sleep_label_purity))

Sub-Clustering on Activity Data

In [ ]:
sub_clusters = activity_percentage_clusterer(TimeSeriesKMeans(num_activity_clusters, metric='dtw', max_iter=50), cluster_assignments, activity_percentages)
In [ ]:
# Sanity Check for the number of points in each cluster
print(np.unique(cluster_assignments, return_counts=True))
for sub_cluster in sub_clusters:
    print(sub_cluster.shape)
Cluster Purity in each subcluster
In [ ]:
# Clustering Purity is defined by ratio of dominant class of sleep label instance in the cluster
# to total number of instances in the cluster
for index, sub_cluster in enumerate(sub_clusters):
    print('Master Cluster:', index+1)
    cluster_sleep_labels = final_sleep_labels[(cluster_assignments == index)]
    for sub_cluster_assignment in range(num_activity_clusters):
        sub_cluster_sleep_labels = cluster_sleep_labels[(sub_cluster==sub_cluster_assignment)]
        try:
            pos_sleep_label_purity = sum(sub_cluster_sleep_labels) / sub_cluster_sleep_labels.shape[0]
            print(f'Sub Cluster Number: {sub_cluster_assignment}, Purity:', max(pos_sleep_label_purity, 1 - pos_sleep_label_purity))
        except:
            print(f'Sub Cluster Number: {sub_cluster_assignment}, No Points assigned')
In [ ]:
sleep_recipes = get_good_sleep_recipes(cluster_assignments, sub_clusters, activity_percentages, final_sleep_labels)
sleep_recipes

K-Means - KL Divergence

Here we apply K-Means on the data with K-L Divergence as the distance metric

Defining the distance function using the K-L Divergence

In [19]:
def k_l_distance(x, y):
    return (entropy(x, y) + entropy(y, x))/ 2
In [81]:
kl_dist_heart = cdist(ordered_heart_trends, ordered_heart_trends, metric=k_l_distance)
kl_dist_calories = cdist(ordered_calories_trends, ordered_calories_trends, metric=k_l_distance)
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
sns.heatmap(kl_dist_heart, xticklabels=137, yticklabels=137, ax=ax[0])
ax[0].set_title('K-L Divergence Cross Matrix for Heart Trends')
sns.heatmap(kl_dist_calories, xticklabels=137, yticklabels=137, ax=ax[1])
ax[1].set_title('K-L Divergence Cross Matrix for Calories Trends')
Out[81]:
Text(0.5, 1.0, 'K-L Divergence Cross Matrix for Calories Trends')

Best Model

In [20]:
kl_best_mod = get_purest_clustering_model(lambda num_clusters: KL_Kmeans(num_clusters), reduced_heart_trends, 
                                          final_sleep_labels)
kmeans: X (272, 72)  centres (2, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDA58>>
kmeans: 5 iterations  cluster sizes: [204  68]
kmeans: X (272, 72)  centres (3, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 7 iterations  cluster sizes: [141  56  75]
kmeans: X (272, 72)  centres (4, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 15 iterations  cluster sizes: [56 57 73 86]
kmeans: X (272, 72)  centres (5, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDA58>>
kmeans: 6 iterations  cluster sizes: [77 17 42 70 66]
kmeans: X (272, 72)  centres (6, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 10 iterations  cluster sizes: [66 57 33 59 32 25]
kmeans: X (272, 72)  centres (7, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 8 iterations  cluster sizes: [35 62 58  7 71 21 18]
kmeans: X (272, 72)  centres (8, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDA58>>
kmeans: 9 iterations  cluster sizes: [49 13 12 28 35 44 55 36]
kmeans: X (272, 72)  centres (2, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDA58>>
kmeans: 8 iterations  cluster sizes: [175  97]
kmeans: X (272, 72)  centres (3, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 5 iterations  cluster sizes: [129  56  87]
kmeans: X (272, 72)  centres (4, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 5 iterations  cluster sizes: [46 91 56 79]
kmeans: X (272, 72)  centres (5, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDA58>>
kmeans: 15 iterations  cluster sizes: [54 47 82 48 41]
kmeans: X (272, 72)  centres (6, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 6 iterations  cluster sizes: [ 91 104  12  27   2  36]
kmeans: X (272, 72)  centres (7, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 14 iterations  cluster sizes: [60 44 26 32 33 49 28]
kmeans: X (272, 72)  centres (8, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDA58>>
kmeans: 8 iterations  cluster sizes: [12 37 41 54 49 50 25  4]
kmeans: X (272, 72)  centres (9, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 9 iterations  cluster sizes: [11  2 64 24 50 53 35 19 14]
kmeans: X (272, 72)  centres (10, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 9 iterations  cluster sizes: [11 28 49 34  8 49 30 10 39 14]
kmeans: X (272, 72)  centres (11, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 10 iterations  cluster sizes: [31 10 27 56 34 31  8 10 32 22 11]
kmeans: X (272, 72)  centres (12, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 8 iterations  cluster sizes: [86 20 10 25  2 56  8 13  4 15 30  3]
kmeans: X (272, 72)  centres (13, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 6 iterations  cluster sizes: [45 48 53  9 36 27  5 19  3  5  1  7 14]
kmeans: X (272, 72)  centres (14, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 11 iterations  cluster sizes: [14 32 10 13 43 16  6 10  4 29 31 26 24 14]
kmeans: X (272, 72)  centres (15, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 8 iterations  cluster sizes: [11  2  7 20 10 30 17 19 26 11 22 34 16 19 28]
kmeans: X (272, 72)  centres (16, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 9 iterations  cluster sizes: [11 10 16 10 12  4 22  3  1 66 21 31 41 13 10  1]
kmeans: X (272, 72)  centres (17, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 9 iterations  cluster sizes: [13 24  3 12  2  9 21 14  6 28 38 40 26  8 13  6  9]
kmeans: X (272, 72)  centres (18, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 8 iterations  cluster sizes: [12 12 10 40 23 14 10  9 12  4 11 19 24  4 47  7 10  4]
kmeans: X (272, 72)  centres (19, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 7 iterations  cluster sizes: [ 4 12 20 18 26  4  2  9 23 32  4  7  8  7 31 26  3 31  5]
kmeans: X (272, 72)  centres (20, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 9 iterations  cluster sizes: [16 31  8 19  4 20 12 10 13 11 44  1 11 17  1  7  6  5 25 11]
kmeans: X (272, 72)  centres (21, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 7 iterations  cluster sizes: [27  4 22 12  3 28  5 20 11  7  4 18  6 12 15 19 12 12 11 14 10]
kmeans: X (272, 72)  centres (22, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 8 iterations  cluster sizes: [22  2  2  2  2 14 13  9  9 18  3  1 22  6 27  2 48  4  2 26  1 37]
kmeans: X (272, 72)  centres (23, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 8 iterations  cluster sizes: [36 18  1  3 14 20  1  7 15  4  1 10  1  7 25 19 11  2 33  6  1 35  2]
kmeans: X (272, 72)  centres (24, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 9 iterations  cluster sizes: [ 6 11 33 12 13  6  3 21 26  7 14  3 35  1 11 10  6  2  7 21  7  5  6  6]
kmeans: X (272, 72)  centres (25, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 8 iterations  cluster sizes: [ 3  5  2 11 44 32  5 16  3  6  6 13  1  8 26  6  2  4  3 15 12 11 20 14
  4]
kmeans: X (272, 72)  centres (26, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDAC8>>
kmeans: 7 iterations  cluster sizes: [ 1  2 16  7  4 16  1  1  6 25 19  5 21  5 28 13  1  1 21 48  1  2 11  7
  1  9]
kmeans: X (272, 72)  centres (27, 72)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B856FDB38>>
kmeans: 8 iterations  cluster sizes: [ 1  4  2  3 11 15  6  5 10 26  1  8 10 27  6  6 12  4  8 21 16  6  7  7
  3 40  7]

Fitting the Model

In [75]:
# Set the seed so that get the same clustering everytime
# random.seed(2)
# np.random.seed(1000)
# Performing the Clustering
# randomcentres = randomsample(reduced_heart_trends, kl_best_mod.get_num_clusters())
randomcentres = randomsample(reduced_heart_trends, 6)
centres, cluster_assignments, dist = kmeans(reduced_heart_trends, randomcentres, metric=k_l_distance, maxiter=200)
sil_score = silhouette_score(reduced_heart_trends, cluster_assignments, metric=k_l_distance)
print(len(centres), sil_score)
np.unique(cluster_assignments, return_counts=True)
kmeans: X (272, 72)  centres (6, 72)  delta=0.001  maxiter=200  metric=<function k_l_distance at 0x0000021B8578DD08>
kmeans: 9 iterations  cluster sizes: [17 65 39 59 57 35]
6 0.15396928136185273
Out[75]:
(array([0, 1, 2, 3, 4, 5], dtype=int64),
 array([17, 65, 39, 59, 57, 35], dtype=int64))
In [76]:
get_all_clusters_sleep_purity(cluster_assignments, final_sleep_labels, measure='gini')
Out[76]:
0.5811418085868727
In [77]:
# Update the number of activity clusters based on the minimum amount of records assigned to a cluster
num_activity_clusters = min(num_activity_clusters, *(np.unique(cluster_assignments, return_counts=True)[1]))
print('Updated Number of activity clusters:', num_activity_clusters)
Updated Number of activity clusters: 9
In [78]:
# Visualizing the number of points in each cluster
sns.distplot(cluster_assignments, kde=False)
Out[78]:
<matplotlib.axes._subplots.AxesSubplot at 0x21b94af6080>

Visualization of Clusters

In [37]:
# Simple Cluster Visualization
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
plt.figure(figsize=(7, 5))
sns.scatterplot(pca_heart[:, 0], pca_heart[:, 1], hue=cluster_assignments)#, style=cluster_assignments)
plt.xlabel('PCA Dim 1')
plt.ylabel('PCA Dim 2')
plt.title('Clusters Visualized')
# plt.legend([f'Cluster: {i+1}' for i in range(8)])
plt.legend([])
Out[37]:
<matplotlib.legend.Legend at 0x21b88dc5358>
In [71]:
# Cluster Visualization based on Sleep Efficiency
pca_mod = PCA(2)
pca_heart = pca_mod.fit_transform(reduced_heart_trends)
plt.figure(figsize=(7, 5))
sns.scatterplot(pca_heart[final_sleep_labels, 0], pca_heart[final_sleep_labels, 1], marker='X', hue=cluster_assignments[final_sleep_labels])
sns.scatterplot(pca_heart[~final_sleep_labels, 0], pca_heart[~final_sleep_labels, 1], marker='o', hue=cluster_assignments[~final_sleep_labels])#, hue=cluster_assignments)
# plt.scatter(pca_heart[:, 0], pca_heart[:, 1], s=np.array(final_sleep_labels, dtype=np.int)*10, cmap='viridis', c=cluster_assignments)
plt.xlabel('PCA Dim 1')
plt.ylabel('PCA Dim 2')
# plt.colorbar(label='Cluster')
plt.title('Clusters Visualized with Good Sleep Labels')
plt.legend(['Good Sleep', 'Poor Sleep'])
Out[71]:
<matplotlib.legend.Legend at 0x21b94ae25c0>

Cluster Purity

Finding cluster purity based on the sleep labels

In [79]:
# Clustering Purity is defined by ratio of dominant class of sleep label instance in the cluster 
# to total number of instances in the cluster
for master_cluster_num in range(len(centres)):
    cluster_sleep_labels = final_sleep_labels[cluster_assignments == master_cluster_num]
    pos_sleep_label_purity = sum(cluster_sleep_labels) / cluster_sleep_labels.shape[0]
    print(f'Cluster Number: {master_cluster_num}, Purity:', max(pos_sleep_label_purity, 1 - pos_sleep_label_purity))
Cluster Number: 0, Purity: 0.8235294117647058
Cluster Number: 1, Purity: 0.676923076923077
Cluster Number: 2, Purity: 0.8205128205128205
Cluster Number: 3, Purity: 0.6101694915254237
Cluster Number: 4, Purity: 0.7017543859649122
Cluster Number: 5, Purity: 0.5714285714285714

Sub-Clustering on Activity Data

In [80]:
sub_clusters = activity_percentage_clusterer(KL_Kmeans(num_clusters=12), cluster_assignments, activity_percentages)
kmeans: X (17, 4)  centres (12, 4)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B94B5F828>>
kmeans: 4 iterations  cluster sizes: [1 1 1 2 1 2 1 2 2 2 1 1]
kmeans: X (65, 4)  centres (12, 4)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B94B5F828>>
kmeans: 2 iterations  cluster sizes: [7 5 3 4 3 8 8 9 5 5 7 1]
kmeans: X (39, 4)  centres (12, 4)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B94B5F828>>
kmeans: 2 iterations  cluster sizes: [7 3 2 3 1 1 3 6 8 2 2 1]
kmeans: X (59, 4)  centres (12, 4)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B94B5F828>>
kmeans: 2 iterations  cluster sizes: [ 5  2  3  5  2  4  2  8  6  7  4 11]
kmeans: X (57, 4)  centres (12, 4)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B94B5F828>>
kmeans: 6 iterations  cluster sizes: [ 2  1  5  6  5  8  2 15  3  3  5  2]
kmeans: X (35, 4)  centres (12, 4)  delta=0.001  maxiter=100  metric=<bound method KL_Kmeans.k_l_distance of <kmeans_dm.KL_Kmeans object at 0x0000021B94B5F828>>
kmeans: 8 iterations  cluster sizes: [5 5 3 4 3 2 3 2 4 1 2 1]
In [81]:
# Sanity Check for the number of points in each cluster
print(np.unique(cluster_assignments, return_counts=True))
for sub_cluster in sub_clusters:
    print(sub_cluster.shape)
(array([0, 1, 2, 3, 4, 5], dtype=int64), array([17, 65, 39, 59, 57, 35], dtype=int64))
(17,)
(65,)
(39,)
(59,)
(57,)
(35,)
Cluster Purity in each subcluster
In [82]:
# Clustering Purity is defined by ratio of dominant class of sleep label instance in the cluster
# to total number of instances in the cluster
for index, sub_cluster in enumerate(sub_clusters):
    print('Master Cluster:', index+1)
    cluster_sleep_labels = final_sleep_labels[(cluster_assignments == index)]
    for sub_cluster_assignment in range(num_activity_clusters):
        sub_cluster_sleep_labels = cluster_sleep_labels[(sub_cluster==sub_cluster_assignment)]
        try:
            pos_sleep_label_purity = sum(sub_cluster_sleep_labels) / sub_cluster_sleep_labels.shape[0]
            print(f'Sub Cluster Number: {sub_cluster_assignment}, Purity:', max(pos_sleep_label_purity, 1 - pos_sleep_label_purity))
            print(f'Sub Cluster Number: {sub_cluster_assignment}, Good Sleep %:', pos_sleep_label_purity)
        except:
            print(f'Sub Cluster Number: {sub_cluster_assignment}, No Points assigned')
Master Cluster: 1
Sub Cluster Number: 0, Purity: 1.0
Sub Cluster Number: 0, Good Sleep %: 0.0
Sub Cluster Number: 1, Purity: 1.0
Sub Cluster Number: 1, Good Sleep %: 0.0
Sub Cluster Number: 2, Purity: 1.0
Sub Cluster Number: 2, Good Sleep %: 0.0
Sub Cluster Number: 3, Purity: 1.0
Sub Cluster Number: 3, Good Sleep %: 0.0
Sub Cluster Number: 4, Purity: 1.0
Sub Cluster Number: 4, Good Sleep %: 0.0
Sub Cluster Number: 5, Purity: 1.0
Sub Cluster Number: 5, Good Sleep %: 0.0
Sub Cluster Number: 6, Purity: 1.0
Sub Cluster Number: 6, Good Sleep %: 1.0
Sub Cluster Number: 7, Purity: 0.5
Sub Cluster Number: 7, Good Sleep %: 0.5
Sub Cluster Number: 8, Purity: 0.5
Sub Cluster Number: 8, Good Sleep %: 0.5
Master Cluster: 2
Sub Cluster Number: 0, Purity: 0.7142857142857143
Sub Cluster Number: 0, Good Sleep %: 0.2857142857142857
Sub Cluster Number: 1, Purity: 0.8
Sub Cluster Number: 1, Good Sleep %: 0.8
Sub Cluster Number: 2, Purity: 0.6666666666666666
Sub Cluster Number: 2, Good Sleep %: 0.6666666666666666
Sub Cluster Number: 3, Purity: 1.0
Sub Cluster Number: 3, Good Sleep %: 1.0
Sub Cluster Number: 4, Purity: 0.6666666666666666
Sub Cluster Number: 4, Good Sleep %: 0.6666666666666666
Sub Cluster Number: 5, Purity: 0.625
Sub Cluster Number: 5, Good Sleep %: 0.625
Sub Cluster Number: 6, Purity: 0.875
Sub Cluster Number: 6, Good Sleep %: 0.875
Sub Cluster Number: 7, Purity: 0.5555555555555556
Sub Cluster Number: 7, Good Sleep %: 0.5555555555555556
Sub Cluster Number: 8, Purity: 0.6
Sub Cluster Number: 8, Good Sleep %: 0.6
Master Cluster: 3
Sub Cluster Number: 0, Purity: 0.5714285714285714
Sub Cluster Number: 0, Good Sleep %: 0.42857142857142855
Sub Cluster Number: 1, Purity: 1.0
Sub Cluster Number: 1, Good Sleep %: 0.0
Sub Cluster Number: 2, Purity: 1.0
Sub Cluster Number: 2, Good Sleep %: 0.0
Sub Cluster Number: 3, Purity: 1.0
Sub Cluster Number: 3, Good Sleep %: 0.0
Sub Cluster Number: 4, Purity: 1.0
Sub Cluster Number: 4, Good Sleep %: 1.0
Sub Cluster Number: 5, Purity: 1.0
Sub Cluster Number: 5, Good Sleep %: 1.0
Sub Cluster Number: 6, Purity: 1.0
Sub Cluster Number: 6, Good Sleep %: 0.0
Sub Cluster Number: 7, Purity: 0.8333333333333334
Sub Cluster Number: 7, Good Sleep %: 0.16666666666666666
Sub Cluster Number: 8, Purity: 0.875
Sub Cluster Number: 8, Good Sleep %: 0.125
Master Cluster: 4
Sub Cluster Number: 0, Purity: 0.6
Sub Cluster Number: 0, Good Sleep %: 0.6
Sub Cluster Number: 1, Purity: 1.0
Sub Cluster Number: 1, Good Sleep %: 1.0
Sub Cluster Number: 2, Purity: 0.6666666666666667
Sub Cluster Number: 2, Good Sleep %: 0.3333333333333333
Sub Cluster Number: 3, Purity: 0.6
Sub Cluster Number: 3, Good Sleep %: 0.6
Sub Cluster Number: 4, Purity: 0.5
Sub Cluster Number: 4, Good Sleep %: 0.5
Sub Cluster Number: 5, Purity: 0.75
Sub Cluster Number: 5, Good Sleep %: 0.75
Sub Cluster Number: 6, Purity: 1.0
Sub Cluster Number: 6, Good Sleep %: 0.0
Sub Cluster Number: 7, Purity: 0.75
Sub Cluster Number: 7, Good Sleep %: 0.25
Sub Cluster Number: 8, Purity: 0.5
Sub Cluster Number: 8, Good Sleep %: 0.5
Master Cluster: 5
Sub Cluster Number: 0, Purity: 0.5
Sub Cluster Number: 0, Good Sleep %: 0.5
Sub Cluster Number: 1, Purity: 1.0
Sub Cluster Number: 1, Good Sleep %: 0.0
Sub Cluster Number: 2, Purity: 1.0
Sub Cluster Number: 2, Good Sleep %: 1.0
Sub Cluster Number: 3, Purity: 1.0
Sub Cluster Number: 3, Good Sleep %: 1.0
Sub Cluster Number: 4, Purity: 1.0
Sub Cluster Number: 4, Good Sleep %: 1.0
Sub Cluster Number: 5, Purity: 0.5
Sub Cluster Number: 5, Good Sleep %: 0.5
Sub Cluster Number: 6, Purity: 1.0
Sub Cluster Number: 6, Good Sleep %: 1.0
Sub Cluster Number: 7, Purity: 0.5333333333333333
Sub Cluster Number: 7, Good Sleep %: 0.5333333333333333
Sub Cluster Number: 8, Purity: 0.6666666666666666
Sub Cluster Number: 8, Good Sleep %: 0.6666666666666666
Master Cluster: 6
Sub Cluster Number: 0, Purity: 0.6
Sub Cluster Number: 0, Good Sleep %: 0.6
Sub Cluster Number: 1, Purity: 0.8
Sub Cluster Number: 1, Good Sleep %: 0.8
Sub Cluster Number: 2, Purity: 0.6666666666666667
Sub Cluster Number: 2, Good Sleep %: 0.3333333333333333
Sub Cluster Number: 3, Purity: 0.5
Sub Cluster Number: 3, Good Sleep %: 0.5
Sub Cluster Number: 4, Purity: 1.0
Sub Cluster Number: 4, Good Sleep %: 1.0
Sub Cluster Number: 5, Purity: 1.0
Sub Cluster Number: 5, Good Sleep %: 1.0
Sub Cluster Number: 6, Purity: 0.6666666666666667
Sub Cluster Number: 6, Good Sleep %: 0.3333333333333333
Sub Cluster Number: 7, Purity: 0.5
Sub Cluster Number: 7, Good Sleep %: 0.5
Sub Cluster Number: 8, Purity: 0.75
Sub Cluster Number: 8, Good Sleep %: 0.25
In [83]:
sleep_recipes = get_good_sleep_recipes(cluster_assignments, sub_clusters, activity_percentages, final_sleep_labels, good_sleep_ratio=1.)
sleep_recipes
Cluster: 0, Sub Cluster: 6, Good Ratio: inf
Cluster: 0, Sub Cluster: 7, Good Ratio: 1.0
Cluster: 0, Sub Cluster: 8, Good Ratio: 1.0
Cluster: 1, Sub Cluster: 1, Good Ratio: 4.0
Cluster: 1, Sub Cluster: 2, Good Ratio: 2.0
Cluster: 1, Sub Cluster: 3, Good Ratio: inf
Cluster: 1, Sub Cluster: 4, Good Ratio: 2.0
Cluster: 1, Sub Cluster: 5, Good Ratio: 1.6666666666666667
Cluster: 1, Sub Cluster: 6, Good Ratio: 7.0
Cluster: 1, Sub Cluster: 7, Good Ratio: 1.25
Cluster: 1, Sub Cluster: 8, Good Ratio: 1.5
Cluster: 1, Sub Cluster: 9, Good Ratio: 4.0
Cluster: 1, Sub Cluster: 10, Good Ratio: 2.5
Cluster: 1, Sub Cluster: 11, Good Ratio: inf
Cluster: 2, Sub Cluster: 4, Good Ratio: inf
Cluster: 2, Sub Cluster: 5, Good Ratio: inf
Cluster: 3, Sub Cluster: 0, Good Ratio: 1.5
Cluster: 3, Sub Cluster: 1, Good Ratio: inf
Cluster: 3, Sub Cluster: 3, Good Ratio: 1.5
Cluster: 3, Sub Cluster: 4, Good Ratio: 1.0
Cluster: 3, Sub Cluster: 5, Good Ratio: 3.0
Cluster: 3, Sub Cluster: 8, Good Ratio: 1.0
Cluster: 4, Sub Cluster: 0, Good Ratio: 1.0
Cluster: 4, Sub Cluster: 2, Good Ratio: inf
Cluster: 4, Sub Cluster: 3, Good Ratio: inf
Cluster: 4, Sub Cluster: 4, Good Ratio: inf
Cluster: 4, Sub Cluster: 5, Good Ratio: 1.0
Cluster: 4, Sub Cluster: 6, Good Ratio: inf
Cluster: 4, Sub Cluster: 7, Good Ratio: 1.1428571428571428
Cluster: 4, Sub Cluster: 8, Good Ratio: 2.0
Cluster: 4, Sub Cluster: 9, Good Ratio: 2.0
Cluster: 4, Sub Cluster: 10, Good Ratio: 4.0
Cluster: 4, Sub Cluster: 11, Good Ratio: 1.0
Cluster: 5, Sub Cluster: 0, Good Ratio: 1.5
Cluster: 5, Sub Cluster: 1, Good Ratio: 4.0
Cluster: 5, Sub Cluster: 3, Good Ratio: 1.0
Cluster: 5, Sub Cluster: 4, Good Ratio: inf
Cluster: 5, Sub Cluster: 5, Good Ratio: inf
Cluster: 5, Sub Cluster: 7, Good Ratio: 1.0
Cluster: 5, Sub Cluster: 10, Good Ratio: inf
Out[83]:
array([[84.2   , 10.414 ,  0.6943,  4.723 ],
       [77.4   , 17.36  ,  1.25  ,  4.027 ],
       [40.7   , 21.39  , 20.97  , 16.94  ],
       [72.56  , 27.47  ,  0.    ,  0.    ],
       [98.4   ,  1.598 ,  0.    ,  0.    ],
       [90.6   ,  9.375 ,  0.    ,  0.    ],
       [78.06  , 20.62  ,  1.319 ,  0.    ],
       [86.56  , 13.45  ,  0.    ,  0.    ],
       [78.8   , 17.6   ,  1.568 ,  2.045 ],
       [78.7   , 20.1   ,  0.3054,  0.917 ],
       [71.7   , 25.88  ,  0.7866,  1.714 ],
       [81.75  , 15.98  ,  1.841 ,  0.486 ],
       [81.6   , 18.38  ,  0.    ,  0.    ],
       [88.9   , 11.11  ,  0.    ,  0.    ],
       [74.3   , 24.44  ,  1.111 ,  0.1389],
       [75.44  , 21.39  ,  1.944 ,  1.25  ],
       [73.6   , 25.56  ,  0.8335,  0.    ],
       [95.5   ,  4.516 ,  0.    ,  0.    ],
       [82.56  , 17.45  ,  0.    ,  0.    ],
       [62.62  , 19.03  , 10.    ,  8.336 ],
       [58.    , 39.34  ,  1.39  ,  1.25  ],
       [74.7   , 25.3   ,  0.    ,  0.    ],
       [58.47  , 40.28  ,  0.6943,  0.5557],
       [70.4   , 28.28  ,  1.389 ,  0.    ],
       [75.5   , 24.45  ,  0.    ,  0.    ],
       [66.94  , 33.03  ,  0.    ,  0.    ],
       [83.1   , 16.84  ,  0.    ,  0.    ],
       [79.4   , 20.62  ,  0.    ,  0.    ],
       [71.4   , 25.1   ,  2.205 ,  1.3545],
       [83.1   , 13.125 ,  2.223 ,  1.527 ],
       [80.06  , 14.86  ,  2.5   ,  2.57  ],
       [89.2   , 10.836 ,  0.    ,  0.    ],
       [76.25  , 14.03  ,  1.806 ,  7.918 ],
       [78.3   , 19.67  ,  0.8335,  1.157 ],
       [86.75  , 13.266 ,  0.    ,  0.    ],
       [86.94  , 11.805 ,  0.903 ,  0.3472],
       [91.2   ,  8.84  ,  0.    ,  0.    ],
       [80.06  , 14.66  ,  2.848 ,  2.43  ],
       [74.56  , 17.64  ,  3.195 ,  4.582 ],
       [80.1   , 18.81  ,  0.8335,  0.2084]], dtype=float16)
In [84]:
for i, sleep_recipe in enumerate(sleep_recipes):
    plt.figure(i)
    plt.bar(['Sedentary', 'Light', 'Moderate', 'Vigorous'], (sleep_recipe * 720 / 100))
    plt.ylabel('Minutes')
    plt.title('Activity Recipes for Sleep')
C:\Users\Saksham\Anaconda3\lib\site-packages\matplotlib\pyplot.py:514: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)
In [ ]: